﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace LDA
{
    public class Result : IComparable
    {
        double prob;
        string word;

        public double Prob
        {
            get { return prob; }
        }

        public string Word
        {
            get { return word; }
        }

        public Result(double prob, string word)
        {
            this.prob = prob;
            this.word = word;
        }

        public int CompareTo(Object obj)
        {
            Result r = obj as Result;
            if (prob > r.prob)
                return -1;
            else if (prob < r.prob)
                return 1;
            else
                return 0;
        }
    }

    public class LDAGibbs
    {
        double[,] phi, theta;
        int alpha, beta, K, M, V;
        string[] vocabArray;
        int[][] W;
        int iterations;
        Random rand;
        Result[][] topicWords;

        public Result[][] TopicWords 
        {
            get { return topicWords; }
        }

        public double[,] Phi
        {
            get { return phi; }
        }

        public double[,] Theta
        {
            get { return theta; }
        }

        public LDAGibbs(int alpha, int beta, int K, int[][] W, string[] vocabArray, int iterations)
        {
            this.alpha = alpha;
            this.beta = beta;
            this.K = K;
            this.W = W;
            this.vocabArray = vocabArray;
            M = W.Length;
            V = vocabArray.Length;
            phi = new double[K, V];
            theta = new double[M, K];
            this.iterations = iterations;
            rand = new Random();
        }

        public void MCMC()
        {
            int[][] zassign = new int[M][];
            int[,] nmk = new int[M, K];
            int[] nm = new int[M];
            int[,] nkv = new int[K, V];
            int[] nk = new int[K];
            for (int m = 0; m < M; m++)
            {
                nm[m] = 0;
                for (int k = 0; k < K; k++)
                {
                    nmk[m, k] = 0;
                }
            }
            for (int k = 0; k < K; k++)
            {
                nk[k] = 0;
                for (int v = 0; v < V; v++)
                {
                    nkv[k, v] = 0;
                }
            }
            
            int z;
            for (int m = 0; m < M; m++)
            {
                int N = W[m].Length;
                zassign[m] = new int[N];
                for (int n = 0; n < N; n++)
                {
                    z = (int)rand.Next(0, K);
                    nmk[m, z]++;
                    nm[m]++;
                    int v = W[m][n];
                    nkv[z, v]++;
                    nk[z]++;
                    zassign[m][n] = z;
                }
            }
            while (iterations > 0)
            {
                Console.WriteLine("Iteration: {0}", iterations);
                for (int m = 0; m < M; m++)
                {
                    int N = W[m].Length;
                    for (int n = 0; n < N; n++)
                    {
                        z = zassign[m][n];
                        // this is to exclude the current i

                        int v = W[m][n];
                        nmk[m, z]--;
                        nm[m]--;
                        nkv[z, v]--;
                        nk[z]--;

                        z = SampleZ(nkv, nk, nmk, nm, m, n);
                        
                        nmk[m, z]++;
                        nm[m]++;
                        nkv[z, v]++;
                        nk[z]++;

                        zassign[m][n] = z;
                    }
                }
                iterations--;
            }

            topicWords = new Result[K][];
            for (int k = 0; k < K; k++)
            {
                topicWords[k] = new Result[V];
                for (int v = 0; v < V; v++)
                {
                    phi[k, v] = (double)(nkv[k, v] + beta) / (double)(nk[k] + V * beta);
                    topicWords[k][v] = new Result(phi[k, v], vocabArray[v]);
                }
                Array.Sort(topicWords[k]);

                for (int m = 0; m < M; m++)
                {
                    theta[m, k] = (double)(nmk[m, k] + alpha) / (double)(nm[m] + K * alpha);
                }
            }
        }

        private int SampleZ(int[,] nkv, int[] nk, int[,] nmk, int[] nm, int m, int n)
        {
            double[] p = new double[K];
            for (int k = 0; k < K; k++)
            {
                int v = W[m][n];
                p[k] = (double)(nkv[k, v] + beta) / (double)(nk[k] + V * beta) * 
                        (double)(nmk[m, k] + alpha) / (double)(nm[m] + K * alpha);
            }

            // cumulate multinomial parameters
            for (int k = 1; k < K; k++)
            {
                p[k] += p[k - 1];
            }

            // scaled sample because of unnormalized p[]
            double u = rand.NextDouble() * p[K - 1];
            int z;
            for (z = 0; z < K; z++)
            {
                if (p[z] > u)
                {
                    break;
                }
            }
            return z;
        }
    }
}
